# -*- coding: utf-8 -*-#

# -------------------------------------------------------------------------------
# Name:         job51_utils
# Description:  提取51job页面的内容
# Author:       zhuchunxu
# Date:         2020/11/3
# -------------------------------------------------------------------------------
import datetime
import re
import string
from logging import getLogger

from lxml import etree
from lxml.html import tostring

from spider_source.items import Job


class Pzhrc1Utils:

    def __init__(self):
        self.logger = getLogger(__name__)

    def get_page_detail_links(self, response):
        """
        获取当前页所有的详情页链接
        """
        html = etree.HTML(response.text)
        job_tr_list = html.xpath("//div[@class='redian3']/table/tbody/tr")
        cur_page_detail_link = []
        for i in range(1, len(job_tr_list) - 1):
            href = job_tr_list[i].xpath("./@onclick")[0].strip()[13:-2]
            href = "http://www.pxrc.com.cn" + href
            cur_page_detail_link.append(href)
        print(cur_page_detail_link)
        return cur_page_detail_link


